f_dmrs <- fread("/home1/NEURO/SHARE_DECIPHER/DMRs/TERRE/sig.cpgs.TERRE.F.HT.ancestry.csv")[abs(meanbetafc) >= 0.03]
m_dmrs <- fread("/home1/NEURO/SHARE_DECIPHER/DMRs/TERRE/sig.cpgs.TERRE.M.HT.ancestry.csv")[abs(meanbetafc) >= 0.03]
males_pd <- rbindlist(
lapply(
Sys.glob("male_terre_pd_f_*_dnam_breakdown.txt.gz"),
function(f) fread(f, fill = TRUE)),
fill = TRUE
)[cpg %in% m_dmrs$cpg]
males_pd[, `:=`(f_q=p.adjust(f_p, method = "BH")), by = c("model", "env")]
females_pd <- rbindlist(
lapply(
Sys.glob("female_terre_pd_f_*_dnam_breakdown.txt.gz"),
function(f) fread(f, fill = TRUE)),
fill = TRUE)[cpg %in% f_dmrs$cpg]
females_pd[, `:=`(f_q=p.adjust(f_p, method = "BH")), by = c("model", "env")]
Filtering data by:
males_pd_sig <- males_pd[f_q < 0.05]
females_pd_sig <- females_pd[f_q < 0.05]
aic ranking
males_aic <- males_pd_sig[,.SD[which.min(aic)],by="cpg"]
females_aic <- females_pd_sig[,.SD[which.min(aic)],by="cpg"]
males_aic[m_dmrs,on=.(cpg)]
females_aic[f_dmrs,on=.(cpg)]
males_aic$Sex <- "Male"
females_aic$Sex <- "Female"
ggplot(rbind(males_aic,females_aic), aes(model,fill=Sex)) +
geom_bar(position="dodge")+
geom_text(stat="count",aes(label = ..count..),position=position_dodge(width=0.9),vjust=0)+
scale_fill_manual(values=c("Male"="lightblue2","Female"="pink"))+
theme_minimal()
f_dmrs$Sex <- "Female"
m_dmrs$Sex <- "Male"
all_dmr <- rbind(m_dmrs,f_dmrs)
all_aic <- rbind(males_aic, females_aic)
to_plot <- all_aic[all_dmr,on=.(cpg,Sex),nomatch=0]
ggplot(to_plot[!duplicated(paste0(dmr,model,Sex))], aes(model,fill=Sex)) +
geom_bar(position="dodge")+
geom_text(stat="count",aes(label = ..count..),position=position_dodge(width=0.9),vjust=0)+
scale_fill_manual(values=c("Male"="lightblue2","Female"="pink"))+
theme_minimal()
Take all significant models, compare G vs E vs GxE effect sizes:
males_pd_sig[,.(.N,uniqueN(cpg)),by="model"]
females_pd_sig[,.(.N,uniqueN(cpg)),by="model"]
plt <- VennDiagram::venn.diagram(
lapply(split(males_pd_sig,by="model"),function(dt)dt$cpg),
filename=NULL
)
grid::grid.newpage()
grid::grid.draw(plt)
plt <- VennDiagram::venn.diagram(
lapply(split(females_pd_sig,by="model"),function(dt)dt$cpg),
filename=NULL
)
grid::grid.newpage()
grid::grid.draw(plt)
plt <- VennDiagram::venn.diagram(
lapply(split(males_pd_sig,by="model"),function(dt)dt$SNP),
filename=NULL
)
grid::grid.newpage()
grid::grid.draw(plt)
plt <- VennDiagram::venn.diagram(
lapply(split(females_pd_sig,by="model"),function(dt)dt$SNP),
filename=NULL
)
grid::grid.newpage()
grid::grid.draw(plt)
library(ggpubr)
gxe_males_pd <- males_pd[model=="GxE"]
gxe_males_pd[, `:=`(G_q=p.adjust(Gp,method="fdr"),E_q=p.adjust(Ep,method="fdr"),GxE_q=p.adjust(GxEp,method="fdr")),by=c("env")]
gxe_males_pd <- gxe_males_pd[f_q < 0.05]
gxe_males_pd[,.(uniqueN(cpg[G_q < 0.05]),uniqueN(cpg[E_q < 0.05]),uniqueN(cpg[GxE_q < 0.05]))]
gxe_male_sig_G_count <- table(gxe_males_pd[G_q < 0.05][,.(cpg,env)]) # counts of SNPs with significant effects per cpg,env
gxe_males_pd[G_q < 0.05][!duplicated(cpg)]
sig_male_e <- gxe_males_pd[E_q < 0.05]
sig_male_e[!duplicated(sig_male_e[,.(env,cpg)])]
gxe_females_pd <- females_pd[model=="GxE"]
gxe_females_pd[, `:=`(G_q=p.adjust(Gp,method="fdr"),E_q=p.adjust(Ep,method="fdr"),GxE_q=p.adjust(GxEp,method="fdr")),by=c("env")]
gxe_females_pd <- gxe_females_pd[f_q < 0.05]
gxe_females_pd[,.(uniqueN(cpg[G_q < 0.05]),uniqueN(cpg[E_q < 0.05]),uniqueN(cpg[GxE_q < 0.05]))]
gxe_female_sig_G_count <- table(gxe_females_pd[G_q < 0.05][,.(cpg,env)]) # counts of SNPs with significant effects per cpg,env
gxe_females_pd[G_q < 0.05][!duplicated(cpg)]
gxe_females_pd[E_q < 0.05]
NA
ggboxplot(
melt(gxe_males_pd[,.SD[which.min(aic)],by="cpg"][,.(G=abs(Gest),E=abs(Eest),GxE=abs(GxEest))],value.name="Estimate",variable.name = "Coefficient"),x = "Coefficient",y="Estimate",outlier.shape = NA,fill = "lightblue2") +
coord_cartesian(ylim=c(0,1)) +
stat_compare_means(
paired=TRUE,label.y = -0.5,method.args = list(alternative ="g"),tip.length = 0.01, step.increase = 0.012,
comparisons = list(c("G","E"),c("G","GxE"),c("E","GxE")))
id.vars and measure.vars are internally guessed when both are 'NULL'. All non-numeric/integer/logical type columns are considered id.vars, which in this case are columns []. Consider providing at least one of 'id' or 'measure' vars in future.
compare_means(
Estimate~Coefficient,paired=TRUE,
melt(gxe_males_pd[,.SD[which.min(aic)],by="cpg"][,.(G=abs(Gest),E=abs(Eest),GxE=abs(GxEest))],value.name="Estimate",variable.name = "Coefficient"),
alternative="l",method = "wilcox")
id.vars and measure.vars are internally guessed when both are 'NULL'. All non-numeric/integer/logical type columns are considered id.vars, which in this case are columns []. Consider providing at least one of 'id' or 'measure' vars in future.
kruskal.test(melt(gxe_males_pd[,.SD[which.min(aic)],by="cpg"][,.(G=abs(Gest),E=abs(Eest),GxE=abs(GxEest))],value.name="Estimate",variable.name = "Coefficient"))
id.vars and measure.vars are internally guessed when both are 'NULL'. All non-numeric/integer/logical type columns are considered id.vars, which in this case are columns []. Consider providing at least one of 'id' or 'measure' vars in future.some elements of 'x' are not numeric and will be coerced to numeric
Kruskal-Wallis rank sum test
data: melt(gxe_males_pd[, .SD[which.min(aic)], by = "cpg"][, .(G = abs(Gest), E = abs(Eest), GxE = abs(GxEest))], value.name = "Estimate", variable.name = "Coefficient")
Kruskal-Wallis chi-squared = 200.47, df = 1, p-value < 2.2e-16
ggboxplot(
melt(gxe_females_pd[,.SD[which.min(aic)],by="cpg"][,.(G=abs(Gest),E=abs(Eest),GxE=abs(GxEest))],value.name="Estimate",variable.name = "Coefficient"),x = "Coefficient",y="Estimate",outlier.shape=NA,fill = "pink") +
coord_cartesian(ylim=c(0,1.5)) +
stat_compare_means(paired=TRUE,label.y = -14.,method.args = list(alternative ="l"),tip.length = 0.0005,step.increase=0.0006,comparisons = list(c("G","E"),c("G","GxE"),c("E","GxE")))
id.vars and measure.vars are internally guessed when both are 'NULL'. All non-numeric/integer/logical type columns are considered id.vars, which in this case are columns []. Consider providing at least one of 'id' or 'measure' vars in future.
compare_means(Estimate~Coefficient,paired=TRUE,melt(gxe_females_pd[,.SD[which.min(aic)],by="cpg"][,.(G=abs(Gest),E=abs(Eest),GxE=abs(GxEest))],value.name="Estimate",variable.name = "Coefficient"),alternative="g")
id.vars and measure.vars are internally guessed when both are 'NULL'. All non-numeric/integer/logical type columns are considered id.vars, which in this case are columns []. Consider providing at least one of 'id' or 'measure' vars in future.
kruskal.test(melt(gxe_females_pd[,.SD[which.min(aic)],by="cpg"][,.(G=abs(Gest),E=abs(Eest),GxE=abs(GxEest))],value.name="Estimate",variable.name = "Coefficient"))
id.vars and measure.vars are internally guessed when both are 'NULL'. All non-numeric/integer/logical type columns are considered id.vars, which in this case are columns []. Consider providing at least one of 'id' or 'measure' vars in future.some elements of 'x' are not numeric and will be coerced to numeric
Kruskal-Wallis rank sum test
data: melt(gxe_females_pd[, .SD[which.min(aic)], by = "cpg"][, .(G = abs(Gest), E = abs(Eest), GxE = abs(GxEest))], value.name = "Estimate", variable.name = "Coefficient")
Kruskal-Wallis chi-squared = 617.77, df = 1, p-value < 2.2e-16
gxe_males_pd[,`:=`(Zge = (abs(Gest) - abs(Eest))/sqrt(Gse^2 + Ese^2))]
gxe_males_pd[,`:=`(Zp = 2*(pnorm(-abs(Zge))))]
gxe_males_pd[,`:=`(Z_q = p.adjust(Zp,method="fdr")),by=c("env")]
gxe_males_pd[,`:=`(Zgxe = (abs(Gest) - abs(GxEest))/sqrt(Gse^2 + GxEse^2))]
gxe_males_pd[,`:=`(Zgxep = 2*(pnorm(-abs(Zgxe))))]
gxe_males_pd[,`:=`(Zgxe_q = p.adjust(Zgxep,method="fdr")),by=c("env")]
gxe_males_pd[,`:=`(Zegxe = (abs(Eest) - abs(GxEest))/sqrt(Ese^2 + GxEse^2))]
gxe_males_pd[,`:=`(Zegxep = 2*(pnorm(-abs(Zegxe))))]
gxe_males_pd[,`:=`(Zegxe_q = p.adjust(Zegxep,method="fdr")),by=c("env")]
gxe_females_pd[,`:=`(Zge = (abs(Gest) - abs(Eest))/sqrt(Gse^2 + Ese^2))]
gxe_females_pd[,`:=`(Zp = 2*(pnorm(-abs(Zge))))]
gxe_females_pd[,`:=`(Z_q = p.adjust(Zp,method="fdr")),by=c("env")]
gxe_females_pd[,`:=`(Zgxe = (abs(Gest) - abs(GxEest))/sqrt(Gse^2 + GxEse^2))]
gxe_females_pd[,`:=`(Zgxep = 2*(pnorm(-abs(Zgxe))))]
gxe_females_pd[,`:=`(Zgxe_q = p.adjust(Zgxep,method="fdr")),by=c("env")]
gxe_females_pd[,`:=`(Zegxe = (abs(Eest) - abs(GxEest))/sqrt(Ese^2 + GxEse^2))]
gxe_females_pd[,`:=`(Zegxep = 2*(pnorm(-abs(Zegxe))))]
gxe_females_pd[,`:=`(Zegxe_q = p.adjust(Zegxep,method="fdr")),by=c("env")]
gxe_males_pd[Z_q < 0.05,.(hits=uniqueN(cpg),paste0(unique(cpg),collapse=",")),by="env"]
gxe_females_pd[Z_q < 0.05,.(hits=uniqueN(cpg),paste0(unique(cpg),collapse=",")),by="env"]
gxe_males_pd[Z_q < 0.05]
gxe_females_pd[Z_q < 0.05]
unique(gxe_males_pd[Z_q < 0.05]$cpg)
[1] "cg11381759" "cg23034985" "cg05184729" "cg05903289" "cg05962382" "cg22307029" "cg24324837" "cg05388281" "cg10197305" "cg12717203" "cg14192029" "cg19819404" "cg20320494"
[14] "cg21149944" "cg22831726" "cg10464773" "cg18403080" "cg18405330"
unique(gxe_females_pd[Z_q < 0.05]$cpg)
[1] "cg00687962" "cg18041640" "cg25243082"
m_dmrs[cpg %in% unique(gxe_males_pd[Z_q < 0.05]$cpg)]
f_dmrs[cpg %in% unique(gxe_females_pd[Z_q < 0.05]$cpg)]
gxe_males_pd[Zgxe_q < 0.05,.(hits=uniqueN(cpg),paste0(unique(cpg),collapse=",")),by="env"]
gxe_females_pd[Zgxe_q < 0.05,.(hits=uniqueN(cpg),paste0(unique(cpg),collapse=",")),by="env"]
gxe_males_pd[Zgxe_q < 0.05]
gxe_females_pd[Zgxe_q < 0.05]
unique(gxe_males_pd[Zgxe_q < 0.05]$cpg)
[1] "cg11381759" "cg23034985" "cg02645135" "cg02612332" "cg08433999" "cg10507346" "cg07278634" "cg08285289" "cg05903289" "cg05962382" "cg22307029" "cg24324837" "cg07594247"
[14] "cg07772999" "cg11763394" "cg17445212" "cg19083407" "cg21482265" "cg21550016" "cg23122642" "cg05388281" "cg10197305" "cg12717203" "cg14192029" "cg19819404" "cg20320494"
[27] "cg21149944" "cg22831726" "cg04265523" "cg10464773" "cg18403080" "cg18405330"
unique(gxe_females_pd[Zgxe_q < 0.05]$cpg)
[1] "cg09767236" "cg09499504" "cg14341378" "cg17662445" "cg06699216" "cg00687962" "cg04399643" "cg18041640" "cg25243082" "cg05841700" "cg07157834" "cg07167872" "cg07533224"
[14] "cg11965913" "cg12898220" "cg14159672" "cg14893161" "cg16334093" "cg17178900" "cg24503407" "cg26354017" "cg05432213" "cg20803293" "cg20331612" "cg26191422" "cg16787483"
[27] "cg12045875" "cg00290607" "cg14500267" "cg14942906" "cg23188684" "cg24690094" "cg16786756" "cg01485177" "cg24598948" "cg00588198" "cg03198009" "cg03449857" "cg04071440"
[40] "cg07134666" "cg08022281" "cg10648573" "cg11383134" "cg11747594" "cg12644888" "cg13835168" "cg15570656" "cg15708526" "cg16885113" "cg19636627" "cg20228636" "cg22494932"
[53] "cg24100841" "cg25699073" "cg25978138"
m_dmrs[cpg %in% unique(gxe_males_pd[Zgxe_q < 0.05]$cpg)]
f_dmrs[cpg %in% unique(gxe_females_pd[Zgxe_q < 0.05]$cpg)]
gxe_males_pd[Zegxe_q < 0.05,.(hits=uniqueN(cpg),paste0(unique(cpg),collapse=",")),by="env"]
gxe_females_pd[Zegxe_q < 0.05,.(hits=uniqueN(cpg),paste0(unique(cpg),collapse=",")),by="env"]
gxe_males_pd[Zegxe_q < 0.05]
gxe_females_pd[Zegxe_q < 0.05]
unique(gxe_males_pd[Zegxe_q < 0.05]$cpg)
character(0)
unique(gxe_females_pd[Zegxe_q < 0.05]$cpg)
character(0)
m_dmrs[cpg %in% unique(gxe_males_pd[Zegxe_q < 0.05]$cpg)]
f_dmrs[cpg %in% unique(gxe_females_pd[Zegxe_q < 0.05]$cpg)]
ggboxplot(rbindlist(list(Male=gxe_males_pd[,.(Zge)],Female=gxe_females_pd[,.(Zge)]),idcol="Sex"),x="Sex",y="Zge",fill="Sex") +
scale_fill_manual(values=c(Male = "lightblue2", Female="pink")) +
stat_compare_means()
ggboxplot(rbindlist(list(Male=gxe_males_pd[,.(Zgxe)],Female=gxe_females_pd[,.(Zgxe)]),idcol="Sex"),x="Sex",y="Zgxe",fill="Sex") +
scale_fill_manual(values=c(Male = "lightblue2", Female="pink")) +
stat_compare_means()
ggboxplot(rbindlist(list(Male=gxe_males_pd[,.(Zegxe)],Female=gxe_females_pd[,.(Zegxe)]),idcol="Sex"),x="Sex",y="Zegxe",fill="Sex") +
scale_fill_manual(values=c(Male = "lightblue2", Female="pink")) +
stat_compare_means()